rm(list = ls(all.names = TRUE))
gc()
set.seed(21191712)
options(scipen=999)

packages <-c("tidyverse","foreign","stargazer","naniar","ri2")

new.packages <- packages[!(packages %in% installed.packages()[,"Package"])]
if(length(new.packages)) install.packages(new.packages)

lapply(packages, require, character.only = TRUE)
rm(packages, new.packages)

setwd("PUT YOUR WORKING DIRECTORY HERE")

# load data
dat <- foreign::read.dta("./data.dta")

#---------------#
# Data Cleaning #
#---------------#

dat$q6_years_married[dat$q6_years_married == -9] <- 0
dat$treatment_binary <- ifelse(dat$holy_endorser==1 | dat$secular_endorser==1, 1, 0)
dat$q30_combined_s <- scale(dat$q30_combined)

## select all relevant covariates and outcomes first
dat <- dat %>% 
  select(q30_combined, q30_combined_s, treatment_binary, respondent_muslim, respondent_male, 
         enumerator_muslim, enumerator_male, q1_age, q2_male, q3_kamba, q3_kikuyu, q3_luo, 
         q3_somali, q3_other, q4_religion_christian, q4_religion_muslim, q5_convert, 
         q6_years_married, q6_married, q7_has_job, q8_income_last_month, q8_has_income,
         q9_chance_become_rich, q10_plans_to_vote, q11_govt_represents_interest,
         q12_feels_on_loosing_side, q13_b_knows_emigrant_som_eri,
         q15_witnesses_interrel_violence, q16_witnesses_viol_musl_govt, q17_friend_family_died,
         q18_lost_job, q18_arrested, q18_house_of_worship_raided, q18_stopped_speaking_parents, 
         q18_love_problems, q18_friend_left_country, q18_sum, q19_rlts_mother,
         q19_not_raised_by_mom, q19_mother_dead, q20_rlts_father, q20_not_raised_by_father,
         q20_father_dead, q21_respect_friends_family, q22_gender, q22_religion, q22_tribal,
         q22_national, q22_youth, q23_religious_attendance, q23_frequents_house_worship,
         q24_deontology, q25_payment_acceptable, q26_payment_ethical, q27_radicalization, 
         q33_distance)

## check for missings
naniar::vis_miss(dat)

## following variables seems problematic, i.e., shouldn't impute at this level of missingness
table(is.na(dat$q13_a_knows_emigrant))
table(is.na(dat$q19_rlts_mother))
table(is.na(dat$q20_rlts_father))
table(is.na(dat$q23_religious_attendance))

## kick vars with lots of missingness out
dat <- dat %>% select(-q13_a_knows_emigrant, -q19_rlts_mother, -q20_rlts_father, -q23_religious_attendance)

## check missing again:
naniar::vis_miss(dat) 

## remove one missing treatment indicator (can't be imputed)
dat <- dat[which(dat$treatment_binary>-1),]

## impute rest of missings with mean 
for(i in 1:ncol(dat)){
  dat[is.na(dat[,i]), i] <- mean(dat[,i], na.rm = TRUE)
}
rm(i)

## 
dat <- dat[complete.cases(dat), ]


#--------#
# Models #
#--------#

## OLS without controls 

lm_wo_controls <- lm(q30_combined ~ treatment_binary, data=dat)

## get RI pvalue

declaration <- declare_ra(N = length(dat[,1]), m = table(dat$treatment_binary)[[2]])

dat$Z <- dat$treatment_binary
table(dat$Z)
dat$Y <- dat$q30_combined
hist(dat$Y)

ri2_lm_wo_controls <- conduct_ri(
  formula = Y ~ Z,
  declaration = declaration,
  sharp_hypothesis = 0,
  data = dat
)
summary(ri2_lm_wo_controls) ### NOTE THIS IS PROBABILISTIC, MAY BE SLIGHTLY DIFFERENT ON REPLICATORS' END




## OLS with controls 

## select controls
controls <- c("respondent_muslim", "respondent_male", 
              "enumerator_muslim", "enumerator_male", "q1_age", "q2_male", "q3_kamba", "q3_kikuyu", "q3_luo",
              "q3_somali", "q3_other", "q4_religion_christian", "q4_religion_muslim", "q5_convert",
              "q6_years_married", "q6_married", "q7_has_job", "q8_income_last_month", "q8_has_income",
              "q9_chance_become_rich", "q10_plans_to_vote", "q11_govt_represents_interest",
              "q12_feels_on_loosing_side", "q13_b_knows_emigrant_som_eri",
              "q15_witnesses_interrel_violence", "q16_witnesses_viol_musl_govt", "q17_friend_family_died",
              "q18_lost_job", "q18_arrested", "q18_house_of_worship_raided", "q18_stopped_speaking_parents",
              "q18_love_problems", "q18_friend_left_country", "q18_sum", "q19_rlts_mother",
              "q19_not_raised_by_mom", "q19_mother_dead", "q20_rlts_father", "q20_not_raised_by_father",
              "q20_father_dead", "q21_respect_friends_family", "q22_gender", "q22_religion", "q22_tribal",
              "q22_national", "q22_youth", "q23_religious_attendance", "q23_frequents_house_worship",
              "q24_deontology", "q25_payment_acceptable", "q26_payment_ethical", "q27_radicalization", "q33_distance")

## drop enumerator FEs for now, add later
controls_temp <- controls[controls != "enumerator_male"]
controls_temp <- controls_temp[controls_temp != "enumerator_muslim"]


## build model
lm_all_controls <- lm(paste("q30_combined ~ treatment_binary + ", paste(controls_temp, collapse=" + ")), data = dat)
summary(lm_all_controls)


## get RI pvalue

# get residuals
y_on_x <- lm(paste("q30_combined ~ ", paste(controls_temp, collapse=" + ")), data = dat)

# redefine Y as resid(Y) averaging out controls
dat$Y <- residuals(y_on_x)
dat$Z <- dat$treatment_binary


ri2_lm_all_controls <- conduct_ri(
  formula = Y ~ Z,
  declaration = declaration,
  sharp_hypothesis = 0,
  data = dat
)

summary(ri2_lm_all_controls)



## OLS with controls and fixed effects

## build model
lm_all_controls_fe <- lm(paste("q30_combined ~ treatment_binary + ", paste(controls, collapse=" + ")), data = dat)
summary(lm_all_controls_fe)

## get RI pvalue

# first get residuals
y_on_x <- lm(paste("q30_combined ~ ", paste(controls, collapse=" + ")), data = dat)

# redefine Y as resid(Y) averaging out controls
dat$Y <- residuals(y_on_x)
dat$Z <- dat$treatment_binary

ri2_lm_all_controls_fe <- conduct_ri(
  formula = Y ~ Z,
  declaration = declaration,
  sharp_hypothesis = 0,
  data = dat
)

summary(ri2_lm_all_controls_fe)


stargazer(lm_wo_controls, lm_all_controls, lm_all_controls_fe,
          keep = c("treatment_binary"),
          covariate.labels = c("Treatment"),
          add.lines = list(c("RI p-value", "0.053", "0.034", "0.023"),
                           c("Controls", "No", "Yes", "Yes"),
                           c("FEs", "No", "No", "Yes")),
          keep.stat = c("n", "adj.rsq"),
          digits = 3,
          dep.var.caption = "Support for violence",
          dep.var.labels = c("","",""),
          label = "tab:main",
          title = "Effect of anti-violence norms on violent attitudes",
          out = "PUT YOUR FILE PATH HERE")





